#!/bin/bash
# 4 */1 * * * run_format_hour_log.sh > run_format_hour_log.log 2>&1
source /etc/profile

if [ $# -eq 1 ]; then
	targetHour=$1
else
	targetHour=`date -d "-1 hours" +"%Y%m%d%H"`
fi
path=$(cd $(dirname $0);pwd)
parentPath=$(dirname ${path})
targetDate=${targetHour:0:8}
hour=${targetHour:0-2}
outputPath="/user/work/trackingFormatLog/${targetDate}/${hour}"
hadoop fs -rm -r ${outputPath}
# streaming job
hadoop jar ${parentPath}/lib/hadoop-streaming-2.7.2.jar \
-libjars ${parentPath}/lib/NASUDF-0.0.1.jar \
-D mapred.job.name="tracking_log_format_$targetHour" \
-outputformat com.nsw.tools.NullValueOutputFormat \
-file ${path}/format_hour_log_map.py \
-mapper ${path}/format_hour_log_map.py \
-input /logs/source/nas/track/${targetDate}/track*${targetHour}*bz2 -output ${outputPath}
wait
#load data to hive
hive -e "use formatlog;alter table nas_tracking_format_hour drop if exists partition (day=${targetDate},hour=${hour});alter table nas_tracking_format_hour add partition(day=${targetDate},hour=${hour}) location '${outputPath}'"